# -*- coding: utf-8 -*-
import scrapy

from scrapy_douban.items import DoubanItem


class DoubanSpiderSpider(scrapy.Spider):
    name = 'douban_spider'
    allowed_domains = ['movie.douban.com']
    start_urls = ['https://movie.douban.com/top250']

    def parse(self, response):
        movive_ol = response.xpath('//ol[@class="grid_view"]/li')
        for item in movive_ol:
            douban_item = DoubanItem()
            douban_item["serial_number"]=item.xpath('.//div[@class="pic"]/em/text()').extract_first()
            douban_item["movie_name"]=" ".join(item.xpath('.//div[@class="hd"]/a/span/text()').extract()).replace("\xa0","")
            introduce = item.xpath('.//div[@class="bd"]/p/text()').extract()
            introduce_list = []
            [introduce_list.append(str(i).strip().replace("\xa0","")) for i in introduce if str(i).strip() !="" ]
            douban_item["introduce"] = introduce_list
            douban_item["star"] = float(item.xpath('.//div[@class="bd"]/div/span/text()').extract()[0])
            douban_item["evaluate"] = item.xpath('.//div[@class="bd"]/div/span/text()').extract()[1]
            douban_item["describe"] = item.xpath('.//div[@class="bd"]/p/span/text()').extract_first()
            yield  douban_item

        # 下一页
        next_link = item.xpath('//*[@id="content"]/div/div[1]/div[2]/span[3]/a/@href').extract()
        if next_link :
            next_link = next_link[0]
            yield scrapy.Request('https://movie.douban.com/top250'+next_link,callback=self.parse)


